library(readstata13)

Population_data_extrapolated <- readRDS("Results/Population_data_extrapolated.rds")


compute_age_group = function(age_years){
  which.min(age_years >= c(seq(5, 95, by = 5), Inf))
}

age_group_name_age_group_dict = list()
nms = paste(seq(0, 90, by = 5), "-", seq(4, 94, by = 5), sep="")
for (i in seq_along(nms)){
  if (i<17) age_group_name_age_group_dict[nms[i]] = i
  else age_group_name_age_group_dict[nms[i]] = 17
}
age_group_name_age_group_dict["etplus"] = 17
age_group_name_age_group_dict["80+"] = 17

age_group_name_age_group_converter = function(age_group_name){
  sapply(age_group_name, function(nm) age_group_name_age_group_dict[[nm]])
}

weird_code_to_INSEE_code = 1:20 %>% 
  lapply(function(ardt) str_pad(string = ardt, width = 2, side = "left", "0")) %>% 
  (function(ardts){
    ardts %>% 
      lapply(function(ardt) paste("751", ardt, sep = "")) %>% 
      setNames(lapply(X = ardts, FUN = function(ardt) paste("75056SR", ardt, sep = "")))
  })

population_age_structure = readstata13::read.dta13("Data/Census_data.dta") %>% 
  select(Code, ends_with("RP2016")) %>% 
  mutate(Code = ifelse(Code %in% names(weird_code_to_INSEE_code), yes = weird_code_to_INSEE_code[Code], no = Code) %>% unlist()) %>% 
  (function(df){
    df %>% 
      select(-Code) %>% 
      (function(ddf) ddf/rowSums(ddf)) %>% 
      as_tibble() %>% 
      bind_cols(df %>% select(Code), .)
  }) %>% 
  right_join(Population_data_extrapolated) %>% 
  (function(df){
    df %>% 
      select(ends_with("RP2016")) %>% 
      (function(ddf) ddf*df$Population) %>%
      round() %>% 
      as_tibble() %>% 
      bind_cols(df %>% select(Code, year, Population, predicted), .)
  }) %>% 
  as_tibble %>% 
  rename(population_total = Population) %>% 
  (function(df) {df %>% setNames(df %>% names %>% gsub("Hommes", "M", .))}) %>% 
  (function(df) {df %>% setNames(df %>% names %>% gsub("Femmes", "F", .))}) %>% 
  (function(df) {df %>% setNames(df %>% names %>% gsub("RP2016", "", .))}) %>% 
  (function(df) {df %>% setNames(df %>% names %>% gsub("ans", "", .))}) %>% 
  (function(df) {df %>% setNames(df %>% names %>% gsub("De", "", .))}) %>% 
  (function(df) {df %>% setNames(df %>% names %>% gsub("à", "-", .))}) %>% 
  (function(df){
    df %>% 
      select(Code, year, population_total, predicted, contains("M")) %>% 
      gather(age_class_name, population, ends_with("M")) %>% 
      mutate(sex = "male") %>% 
      bind_rows(df %>% 
                  select(Code, year, population_total, predicted, contains("F")) %>% 
                  gather(age_class_name, population, ends_with("F")) %>% 
                  mutate(sex = "female"))
  }) %>% 
  mutate(age_class_name = gsub("M", "", age_class_name)) %>% 
  mutate(age_class_name = gsub("F", "", age_class_name)) %>% 
  mutate(age_class = age_group_name_age_group_converter(age_class_name)) %>% 
  mutate(Département = str_sub(string = Code, start = 1, end = 2)) %>% 
  filter(Département != "97") %>% # No census data available for DOM/TOM
  mutate(population = ifelse(is.na(population), yes = 0, no = population)) 

Municipality_data_with_age_group = Municipality_data %>%
  filter(!is.na(AnnéeNaissance)) %>% # Excluding 15232 records
  filter(!is.na(AnnéeDécès)) %>%
  filter(as.numeric(JourDécès) != 0) %>% # excluding further 6 records
  filter(as.numeric(JourNaissance) != 0) %>% # excluding further 4267 records
  filter(as.numeric(MoisNaissance) != 0) %>% # excluding 15238 death records
  filter(AnnéeDécès != "") %>% # Filtering what is actually population data
  filter(AnnéeDécès >= 1900) %>% # excluding one record where DeathDate = 201
  mutate(
    Birth_date = lubridate::ymd(paste(AnnéeNaissance, MoisNaissance, JourNaissance, sep = "/")),
    Death_date = lubridate::ymd(paste(AnnéeDécès, MoisDécès, JourDécès, sep = "/"))
  ) %>%
  mutate(Age_at_death = Death_date - Birth_date) %>% 
  mutate(age_class = Vectorize(compute_age_group)(Age_at_death/365.25)) %>% 
  mutate(sex = ifelse(test = Sexe == 1, yes = "male", no = "female"))

mortality_period_municipality_level_age_sex <- function(start_date, end_date) {
  Municipality_data_with_age_group %>%
    filter(Death_date >= start_date) %>%
    filter(Death_date <= end_date) %>%
    mutate(Municipality_code = Code) %>%
    group_by(Municipality_code, age_class, sex) %>%
    summarise(NbDeath = n())
}